from selenium import webdriver
import time
import urllib.request
import re
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

def loadfile():
    names=[]
    urls=[]
    f=open("111.txt") #文件改这里，自己存储的时候网址和名字用英文输入法下的","号隔开 
    lines=f.readlines()
    for line in lines:
       line=line.strip()
       spline=line.split(',')
       names.append(spline[1])
       urls.append(spline[0])
    return urls,names

def download(url):
    headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0")
    opener=urllib.request.build_opener()
    opener.addheaders=[headers]
    data=opener.open(url).read()
    return data

def save(filename,data,lujing):
    name=lujing+filename
    f=open(name,'wb')
    f.write(data)
    f.close()


def downloadsina(url,filename,address):
    dcap=dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.setting.userAgent"]=("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0")
    browser=webdriver.PhantomJS(desired_capabilities=dcap)
    browser.get(url)                                                      #这里修改爬取的微博地址 以及 存储路径以及爬取的微博名
    picname=address+filename+'.jpg'
    htmlname=address+filename+'.html'                                        #注意：只能是手机触屏版的微博
    for i in range(5):                                                                 #原始地址：m.weibo.cn 然后搜索你要的微博
        js='window.scrollBy('+str(i*1080)+')' #这里数字根据你电脑的分辨率 比如我的1080P 就填写1980
        browser.execute_script(js)                        #如果网速慢 把延迟设置高一点 运行后要等一下，等它加载完
        time.sleep(1)
    a=browser.get_screenshot_as_file(picname)
    data=browser.page_source
    f=open(htmlname,"w",encoding="utf-8")             #这里修改爬取的网页的路径和名称
    f.write(data)
    f.close()
    browser.quit()
    
def spider():
    urls,names=loadfile()
    piclujing=input("请输入图片存储路径：") #这里输入图片的存储地址 比如 D:/123/  一定要有斜杠！！
    splujing=input("请输入视频图片存储路径:")#这里是视频截图的存储地址
    for i in range(len(names)):
        url=urls[i]
        name=names[i]
        print(url)
        print(name)
        downloadsina(url,name,"D:/")
        url="file:///D:/"+name+".html"
        data=urllib.request.urlopen(url).read().decode("utf-8","ignore")
        pat1='m-imghold-square"><img src="(.*?)"></div></li>'
        ret1=re.compile(pat1).findall(data)
        pat2='style="background-image: url(.*?);">'
        ret2=re.compile(pat2).findall(data)
        for i in ret1:
            data=download(i)
            filename=i.split('/')[-1]
            save(filename,data,piclujing)
        for j in ret2:
            urlj=j.strip('(').strip(')')
            data=download(urlj)
            filename=j.split('/')[-1].strip(')')
            save(filename,data,splujing)


if __name__=='__main__':
     spider()
